package com.ymorning.common.utils;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.ymorning.common.entity.Cache;

/**
 * 
 * GetUrlContext.java
 * 
 * @descriptions
 * @author Dylan
 * @contact taodizhou@foxmail.com
 * @date 2013-11-3 上午11:17:17
 * @supportSite www.ymorning.com
 */

public class GetUrlContext {
	private static final String chinaIt = "http://115.182.21.32:8080/ccidnetPaging/news_list_2013.jsp?columnid=945"; //业界—国内新闻
	private static final String adjIt = "http://115.182.21.32:8080/ccidnetPaging/news_list_2013.jsp?columnid=946";   //业界—国际新闻
	private static final String netChinaIt = "http://115.182.21.32:8080/ccidnetPaging/news_list_2013.jsp?columnid=11097"; //互联网—国内新闻
	private static final String netAdjIt = "http://115.182.21.32:8080/ccidnetPaging/news_list_2013.jsp?columnid=11099";   //互联网—国际新闻
	private static final SimpleDateFormat sf = new SimpleDateFormat("yyyyMMdd");
	private static final SimpleDateFormat sfmm = new SimpleDateFormat("yyyy-MM-dd hh:ss:mm");
	
	/**
	 * 获取网站原生内容
	 * @return html页面内容
	 * @param args
	 */
	public static String getPageContext(String httpUrl) {
		try {
			URL url = new URL(httpUrl);
			BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream(),"GB2312"));
			String s = "";
			StringBuffer sb = new StringBuffer("");
			while ((s = br.readLine()) != null) {
				 sb.append(s + "\r\n");
			}
			br.close();
			return sb.toString();
		} catch (Exception e) {
			return "error open url" + httpUrl;

		}
	}
	/**
	 * 过滤原生html获取新闻列表
	 * 匹配html插件：http://www.open-open.com/jsoup
	 * @return 超链接全部信息 (a=一条新闻)
	 */
	public static List<String> filterhtml(String mark){
		String getStr = "";
		
		if(mark.equals("0")){
			getStr = getPageContext(chinaIt);
		}else if(mark.equals("1")){
			getStr = getPageContext(adjIt);
		}else if(mark.equals("2")){
			getStr = getPageContext(netChinaIt);
		}else if(mark.equals("3")){
			getStr = getPageContext(netAdjIt);
		}
		String dateFormat = sf.format(new Date());
		Document doc = Jsoup.parse(getStr);
		Elements a = doc.getElementsByClass("font10");
		List<String> list = new ArrayList<String>();
		for (Element link : a) {
		  String newLink = link.attr("href");
		  if(newLink.indexOf(dateFormat)>0){
			  String newTitle = link.text();	
			  String newMark = newLink.substring(newLink.lastIndexOf("/")+1,newLink.lastIndexOf("_"));
			  String aContext = newLink + "#split" + newTitle + "#split" + newMark;
			  list.add(aContext);
		  }	  
		}
		return list;
	}	
	/**
	 * 获取每条新闻的信息再次装入list
	 */
	public static List<String> getAllNews(String mark){
		List<String> alist = filterhtml(mark);
		System.out.println("该次采集共 "+alist.size() +" 条 ");
		List<String> list = new ArrayList<String>();
		String value = "";
		for(int i = 0;i<alist.size();i++){
			Object[] newObj = alist.get(i).toString().split("#split");
			String newLink = newObj[0].toString();
			String newTitle = newObj[1].toString();
			String newMark = newObj[2].toString();
			String getStr = getPageContext(newLink);
			Document doc = Jsoup.parse(getStr);
			Elements eContext = doc.getElementsByClass("temp");
			for (Element context: eContext) {
				//写入缓存
				String key = "news_"+mark;
				boolean flag = CacheManager.hasCache(key);
				//String value = "";
				if(flag){
					if(CacheManager.getCache(key)!=null && !(CacheManager.getCache(key).getValue()+"").equals("null")){
						String s = CacheManager.getCache(key).getValue()+"";
						if(s.indexOf(newMark)<0){
							value = s +"_"+newMark;
						}
					}else{
						value = newMark;
					}		
				}else{
					value = newMark;
				}
				//CacheManager.clearOnly(key);
				Cache cache = new Cache();
				cache.setKey(key);
				cache.setValue(value);
				CacheManager.putCache(key,cache);
				String  newContext = context.html().replaceAll(">\\s*<","><").replaceAll("【.*】","【ymorning资讯】");
				String globalInfo = newTitle+"#con"+newContext;
				list.add(globalInfo);
			}	
		}
			return list;
	}

		
	public static void main(String[] args) {
		System.out.println("成功采集共 "+getAllNews("1").size()+" 条");
		System.out.println("采集时间  " + sfmm.format(new Date()));
//
//      CacheManager.putCache("abc",new  Cache());  
//      boolean flag = CacheManager.hasCache("abc");
//      if(flag){
//    	  if(CacheManager.getCache("abc")!=null && !(CacheManager.getCache("abc").getValue()+"").equals("null")){
//    	  String s= CacheManager.getCache("abc").getValue().toString();
//          System.out.println(s);
//    	  }
//      }
//      CacheManager.clearAll("abc");  
//      System.out.println("删除后的大小："+CacheManager.getCacheSize());  
//
	}

}
